packages
#Install and import required packages
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(leaflet)
library(ggpubr)
## Loading required package: ggplot2
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringdist)
library(stringr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks stringdist::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpmisc)
## Loading required package: ggpp
## Registered S3 methods overwritten by 'ggpp':
## method from
## heightDetails.titleGrob ggplot2
## widthDetails.titleGrob ggplot2
##
## Attaching package: 'ggpp'
##
## The following object is masked from 'package:ggplot2':
##
## annotate
##
## Registered S3 method overwritten by 'ggpmisc':
## method from
## as.character.polynomial polynom
library(leaflet.extras)
library(ggplot2)
library(leaftime)
Police Violence Data
# Post Rename Variable Names for mpv
# State County date gender race age
mpv <- read.csv("Mapping Police Violence.csv")
mpv <- mpv %>% select(date, state, county, gender, race, age) %>%
rename(State = state, County = county)
mpv$State <- state.name[match(mpv$State, state.abb)] #changes state id to name
mpv <- mpv%>%
mutate(State = ifelse(State == "Virgina", "Virginia", State)) %>%
mutate(State = ifelse(State == "Massachusetts", "Massachusettes", State))
mpv$date <- substr(mpv$date, nchar(mpv$date) - 3, nchar(mpv$date))
mpv <- mpv %>% filter(date=="2022")
head(mpv)
## date State County gender race age
## 1 2022 Kansas Johnson Male White 27
## 2 2022 Louisiana West Baton Rouge Parish Female White 17
## 3 2022 New Jersey Morris Male White 61
## 4 2022 Louisiana West Baton Rouge Parish Female White 15
## 5 2022 Oklahoma Tulsa Male White 27
## 6 2022 Mississippi Jackson Male Black 41
County Population Data
# Post Rename Variable Names for cpop
# State County ESTIMATESBASE2020 POPESTIMATE2020 POPESTIMATE2021 POPESTIMATE2022
cpop <- read.csv("co-est2022-alldata.csv")
cpop<- cpop %>% select(STNAME, CTYNAME, ESTIMATESBASE2020, POPESTIMATE2020, POPESTIMATE2021, POPESTIMATE2022) %>% rename(State = STNAME, County=CTYNAME) %>%
mutate(State = ifelse(State == "Virgina", "Virginia", State)) %>%
mutate(State = ifelse(State == "Massachusetts", "Massachusettes", State))
Police Training Data
# Post Rename Variable Names for pt
# State basic_training field_training police_min_age
pt <- read_excel("ICJTR+Basic+Training+Data+Set.xlsx")
colnames(pt)[colnames(pt) == '\"POLICE OFFICER MINIMUM AGE\"'] <- "police min age"
pt <- pt %>% select(STATE, `POLICE BASIC TRAINING HRS 2020`, `POLICE FIELD TRAINING`, `police min age`) %>% rename(State = STATE, basic_training = `POLICE BASIC TRAINING HRS 2020`, field_training =`POLICE FIELD TRAINING`, police_min_age = `police min age`)
pt <- pt%>%
mutate(State = ifelse(State == "Virgina", "Virginia", State)) %>%
mutate(State = ifelse(State == "Massachusetts", "Massachusettes", State))
Land Area Data
# Post Rename Variable Names for land_are
# State County SQMI
land_area <- read.delim("2023_Gaz_counties_national.txt")
land_area <- land_area %>% select(USPS,NAME,ALAND_SQMI) %>%
rename(County = NAME, SQMI = ALAND_SQMI, State = USPS)
land_area$State <- state.name[match(land_area$State, state.abb)] #ST_ID to Name
land_area <- land_area%>%
mutate(State = ifelse(State == "Virgina", "Virginia", State)) %>%
mutate(State = ifelse(State == "Massachusetts", "Massachusettes", State))
Merging + New Numerical/Categorical
# Post Merge & Rename Variable Names for pland
# State County SQMI Population Density Denisty_c
# Merge land area data and county population data
pland <- merge(land_area, cpop, by = c('State','County'),
all = TRUE) %>% filter(!is.na(SQMI))
# New Numerical Value 'Population' as Average of Estimates 2020-2022
pland <- pland %>%
mutate(Population = rowMeans(select(
.,
ESTIMATESBASE2020,
POPESTIMATE2020,
POPESTIMATE2021,
POPESTIMATE2022),
na.rm = TRUE))
#New Numerical Value 'Density' as Density of Population by SQ mi Area
pland <- pland %>%
mutate(Density = Population/SQMI)
# New Categorical Value 'Density_c' as Range of population by SQ mi Area
pland <- pland %>%
mutate(Density_c = cut(Population / SQMI,
breaks = c(0, 10, 100, 1000, Inf),
labels = c("sparse", "low-moderate", "moderate-high", "very high"),
include.lowest = TRUE)) %>%
select(State,County,SQMI,Population,Density,Density_c)
#uncomment to check
head(pland)
## State County SQMI Population Density Density_c
## 1 Alabama Autauga County 594.455 59168.25 99.53361 low-moderate
## 2 Alabama Baldwin County 1589.863 237694.00 149.50596 moderate-high
## 3 Alabama Barbour County 885.008 24857.25 28.08703 low-moderate
## 4 Alabama Bibb County 622.470 22214.50 35.68766 low-moderate
## 5 Alabama Blount County 644.891 59207.25 91.80970 low-moderate
## 6 Alabama Bullock County 622.815 10285.00 16.51373 low-moderate
Final Merging
# Post Merge & Rename Variable Names for df_final
# State County SQMI Population Density Density_c Victims
#Sorts mpv and counts the number of victims
mpv2 <- mpv %>%
count(State, County, name = "Victims")
# Remove "County" from County column in Pland if it exists
# Changes to match naming conventions
pland <- pland %>%
mutate(County = str_replace(County, " County$", "") %>% str_trim())
# merge to final data set
df_final <- merge(pland,mpv2, by=c('State','County'),all=TRUE)
# uncomment to check
head(df_final)
## State County SQMI Population Density Density_c Victims
## 1 Alabama Autauga 594.455 59168.25 99.53361 low-moderate NA
## 2 Alabama Baldwin 1589.863 237694.00 149.50596 moderate-high 2
## 3 Alabama Barbour 885.008 24857.25 28.08703 low-moderate NA
## 4 Alabama Bibb 622.470 22214.50 35.68766 low-moderate NA
## 5 Alabama Blount 644.891 59207.25 91.80970 low-moderate 1
## 6 Alabama Bullock 622.815 10285.00 16.51373 low-moderate NA
Summarized data frame for Police Training and Victim Count
# Post Merge & Rename Variable Names for ptmpv
# State basic_training, field_training, police_min_age, Victims
# count deaths by state
mpv3 <- mpv %>%
count(State, name = "Victims")
ptmpv <- merge(pt,mpv3, by='State',all=TRUE) %>%
select(State, basic_training, field_training, police_min_age, Victims) %>%
distinct() %>%
filter(State != "Washington D.C.*") %>%
filter(!is.na(police_min_age))
head(ptmpv)
## State basic_training field_training police_min_age Victims
## 1 Alabama 520 0 19 25
## 2 Alaska 650 40 21 8
## 3 Arizona 585 0 21 58
## 4 Arkansas 520 0 21 15
## 5 California 664 560 18 147
## 6 Colorado 556 0 21 39
National Visualization per 2022
# Replace With STATE county Shapefile Folder
counties <- st_read("USAshapefile/USAshapefile")
## Reading layer `USA_Counties_(Generalized)' from data source
## `/Users/chsu/RStudio/info-201-finalproject-main 3/USAshapefile/USAshapefile'
## using driver `ESRI Shapefile'
## Simple feature collection with 3142 features and 60 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -178.2176 ymin: 18.92179 xmax: -66.96927 ymax: 71.40624
## Geodetic CRS: WGS 84
df_final_temp <- df_final %>%
rename(NAME = County) %>%
group_by(NAME) %>%
select(NAME, Population, Density)
mpv_temp <- mpv %>%
rename(NAME = County) %>%
group_by(NAME) %>%
summarise(Victims = n()) %>%
distinct() %>%
ungroup()
#Dont change anything above this
mpv_temp <- merge(df_final_temp,mpv_temp, by="NAME", all=TRUE)
mpv_temp$NAME <- toupper(mpv_temp$NAME)
counties$NAME <- toupper(counties$NAME)
counties <- counties %>% filter(STATE_NAME!="Alaska")
mpv_temp <- mpv_temp %>%
mutate(VictimRatio = Victims/Population * 1000)
map_data <- merge(counties, mpv_temp, by.x = "NAME", by.y = "NAME", all = FALSE)
map_data <- map_data %>%
mutate(VictimRatio = ifelse((VictimRatio==0), NA , VictimRatio))
leaflet(data = map_data) %>%
addProviderTiles("OpenStreetMap.Mapnik") %>%
addPolygons(
data = map_data,
fillColor = ~colorQuantile("YlOrRd", map_data$VictimRatio, n = 10)(map_data$VictimRatio),,
fillOpacity = 0.7,
color = "lightblue",
weight = 1,
label = ~paste(NAME, "<br>", "Victims per 1000:", formatC(VictimRatio, format = "f", digits = 1))
) %>%
addScaleBar(position = "bottomleft") %>%
addMiniMap(position = "bottomleft") %>%
addControl("zoomSlider", position = "topright") %>%
addFullscreenControl()
## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette YlOrRd is 9
## Returning the palette you asked for with that many colors